# MECHANISMS ---------------------------------------------------------------

set.seed(12345)

# WTP / item calculation ---------------------------------------------------------------

r2_choices_with_item_val %>%

  filter(discuss_type %in% c("control", "discussion_full")) %>%

  ggplot(aes(x = item_diff_value, y = as.numeric(r2_choose_comparator),
             colour = pair_includes_trans_label, ymin = NULL, ymax = NULL,
             fill = pair_includes_trans_label)) +

  # Shaded ribbon
  geom_smooth (method = "lm", se = TRUE, alpha = 0.2, size = 0) +

  # Line of best fit
  stat_smooth (
    aes(x = item_diff_value, y = as.numeric(r2_choose_comparator), colour = pair_includes_trans_label),
    method = "lm", se = FALSE,
    geom="line", size=1
  ) +
  facet_wrap(~discuss_type_label) +
  theme_bw() +
  theme(panel.grid.minor = element_blank(),
        panel.grid.major = element_blank()) +
  labs(x = "Difference in value of items (Rs.)\n[Alterntative worker - Benchmark worker]", colour = element_blank(), fill = element_blank(),
       y = "Prob(Chooses alternative worker)") +
  scale_y_continuous(labels = scales::percent_format()) +

  geom_point(
    data = item_diff_summ %>% filter(discuss_type %in% c("control", "discussion_full")),
    aes(x = item_diff_value, y = y, colour = pair_includes_trans_label),
    alpha = 0.5
  ) +
  geom_errorbar(
    data = item_diff_summ %>% filter(discuss_type %in% c("control", "discussion_full")),
    aes(x = item_diff_value, y = y, colour = pair_includes_trans_label, ymin = ymin, ymax = ymax),
    alpha = 0.5,
    width = 0
  )

ggsave("outputs/figs/wtp_by_item_diff.pdf", width = 6, height = 4)

# What is distaste for trans
model_control <- feols(
  r2_choose_comparator ~ pair_includes_trans + item_diff_value,
  data = r2_choices_with_item_val %>% filter(discuss_type == "control"),
  fixef = NULL,
  cluster = "group_id"
) # 17pp discrimination against trans (or 7 pp if vs male only)

print(model_control)

model_treat <- feols(
  r2_choose_comparator ~ pair_includes_trans + item_diff_value,
  data = r2_choices_with_item_val %>% filter(discuss_type == "discussion_full"),
  cluster = "group_id"
) # reduces to 3pp discrimination against trans (or +9 pp positive discrim if vs male only)

model_treat %>% get_p_val("pair_includes_trans") %>% write_stat("outputs/stats/wtp_treat_p_val.tex", digits = 3, p_value = TRUE)

# P-value of diff
feols(
  r2_choose_comparator ~ discuss_type * pair_includes_trans + item_diff_value,
  data = r2_choices_with_item_val %>% filter(discuss_type %in% c("discussion_full", "control")),
  # fixef = "stratum_id",
  cluster = "group_id"
) %>%
  get_p_val("discuss_typediscussion_full:pair_includes_trans") %>%
  write_stat("outputs/stats/diff_wtp_p_val.tex", digits = 3, p_value = TRUE)

print(model_treat)

# What is the gradient of item_val to
model_item_gradient <- feols(
  r2_choose_comparator ~ pair_includes_trans * group_label + item_diff_value,
  data = r2_choices_with_item_val %>% filter(discuss_type %in% c("control", "discussion_full")),
  cluster = "group_id"
)
# 18pp gradient of item_diff_value_rel(perception),
# or 10pp gradient wrt true value
# so basically sacrifciing a day's hh food expenditure to not
# reduces to 0.018746

(-model_control$coefficients[["pair_includes_trans"]]) %>% write_stat("outputs/stats/baseline_discrim_raw.tex") # 0.172338 to avoid trans
(-model_treat$coefficients[["pair_includes_trans"]]) %>% write_stat("outputs/stats/treatment_discrim_raw.tex")  # reduces to 0.018746

model_item_gradient$coefficients[["item_diff_value"]] %>%  # gradient wrt items value (one Rs.)
  # {. * 100} %>%
  write_stat("outputs/stats/item_value_gradient_raw.tex", digits = 4)

model_item_gradient$coefficients[["item_diff_value"]] %>%  # gradient wrt items value (one Rs.)
{. * 100 * 100} %>%
  write_stat("outputs/stats/item_value_gradient_pp.tex", digits = 0)


# Initial WTP to sacrifice:
wtp_control <- - (model_control$coefficients[["pair_includes_trans"]] / model_item_gradient$coefficients[["item_diff_value"]])

wtp_control %>%
  write_stat("outputs/stats/wtp_control.tex", digits = 0)

(wtp_control / median_hh_exp) %>%
  write_stat("outputs/stats/wtp_control_hh_exp.tex", digits = 1)

# Post treatment WTP
- (model_treat$coefficients[["pair_includes_trans"]] / model_item_gradient$coefficients[["item_diff_value"]]) %>%
  write_stat("outputs/stats/wtp_treat.tex", digits = 0)

# COMPARE TO RELIABILITY SCORE
model_reliability_gradient <- feols(
  r2_choose_comparator ~ pair_includes_trans * group_label + item_diff_value + r2_reliability_diff + r2_reliability_shown,
  data = r2_choices_with_item_val %>% filter(discuss_type %in% c("control", "discussion_full")),
  # fixef = "stratum_id",
  cluster = "group_id"
)

gradient_item <- model_reliability_gradient %>% get_coeff("item_diff_value")
gradient_reliability <- model_reliability_gradient %>% get_coeff("r2_reliability_diff")

wtp_for_reliability <- gradient_reliability / gradient_item

wtp_for_reliability %>%
  write_stat("outputs/stats/wtp_for_reliability.tex", digits = 0)

# Attitudes & beliefs ---------------------------------------------------------------

att_models <- list(
  "# statements agreed with\n(list experiment)" = feols_custom(
    list_answer ~ trans_in_list_group + trans_in_list_group:discussion_full + video_type + list_b + trans_in_list_group,
    fixef = c("ind_id", "stratum_id", "video_type", "list_b", "phase", "list_order_first"),
    cluster = "ind_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = c("discussion_full"),
      interact = NULL,
      group_control = FALSE
    ),
    data = list_exp %>% filter(discuss_type %in% c("control", "discussion_full")),
    warn = FALSE, notes = FALSE,
    ri = ri,
    n_sims = ri_sims,
    stratum = "stratum_id",
    var_types = var_types_spec,
    coef_keep = "trans_in_list_group"
  ),
  "Disapproves of\ndiscrimination (=1)" = feols_custom(
    attitude_val ~ discussion_full,
    fixef = c("stratum_id", "attitude_type", "video_type", "phase"),
    cluster = "ind_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = c("discussion_full"),
      interact = NULL,
      group_control = FALSE
    ),
    data = atts_long %>% filter(discuss_type %in% c("control", "discussion_full")),
    ri = ri,
    n_sims = ri_sims,
    stratum = "stratum_id",
    var_types = var_types_spec,
    coef_keep = "discussion_full"
  ),
  "Likely or very likely\nto complete delivery (=1)" = feols_custom(
    likely ~ trans_photo_yn * discussion_full  + factor(stratum_id) + trans_photo_yn + video_type * trans_photo_yn,
    fixef = c("ind_id", "stratum_id", "likelihood_order", "video_type", "phase"),
    cluster = "ind_id",
    data = likelihood %>% filter(discuss_type %in% c("control", "discussion_full")),
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = c("discussion_full"),
      interact = "trans_photo_yn",
      group_control = FALSE
    ),
    ri = ri,
    n_sims = ri_sims,
    stratum = "stratum_id",
    var_types = var_types_spec,
    coef_keep = "^(discussion_full|trans_photo_yn|trans_photo_yn:discussion_full)$"
  )
)

p_vals_atts <- c(
  att_models[[1]] %>% tidy %>% filter(term == "trans_in_list_group:discussion_full") %>% pull(p.value),
  att_models[[2]] %>% tidy %>% filter(term == "discussion_full") %>% pull(p.value)
)

q_vals_atts <- q_val(p_vals_atts)

tex_export(
  att_models,
  file = "outputs/tables/attitudes.tex",
  coef_omit = vars_to_regex("stratum_id|Intercept|video_type|list_b|phase", control_vars),
  coef_rename = coef_label,
  coef_reorder = c("trans_in_list_group:discussion_full", "trans_in_list_group",
                   "discussion_full",
                   "trans_photo_yn:discussion_full", "trans_photo_yn"),
  gof_map = fe_label %>% mutate(fmt = ifelse(raw == "Control mean", 2, fmt)) %>%
    filter(!str_detect(raw, "LASSO|stratum|video|phase")),
  dep_means = list("Mean: No discussion (private)" = "discuss_type == 'control'"),
  add_rows = combine_add_rows(
    list_to_add_rows(list("Controls" = rep("X", 3))),
    vec_to_add_rows(
      c("q-value of treatment effect", q_vals_atts[[1]], q_vals_atts[[2]], "")
    )
  )
)

atts_long %>% filter(discuss_type %in% c("control")) %>%
  pull(attitude_val) %>%
  mean_na() %>%
  write_percentage("outputs/stats/prop_attitude_control.tex", digits = 1)

atts_long %>% filter(discuss_type %in% c("control")) %>%
  pull(attitude_val) %>%
  mean_na() %>%
  write_percentage("outputs/stats/prop_attitude_control_round.tex", digits = 0)

atts_long %>% filter(discuss_type %in% c("discussion_full")) %>%
  pull(attitude_val) %>%
  mean_na() %>%
  write_percentage("outputs/stats/prop_attitude_treat.tex", digits = 1)

att_models[[2]] %>% get_p_val("discussion_full") %>% write_stat("outputs/stats/attitude_p_val.tex", digits = 2)

# Beliefs about reliability
(-100 * att_models[[3]]$coefficients["trans_photo_yn"]) %>%
  write_stat("outputs/stats/effect_belief_reliability.tex", digits = 1)

att_models[[3]] %>% get_p_val("trans_photo_yn") %>%
  write_stat("outputs/stats/p_val_belief_reliability.tex", digits = 3, p_val = TRUE)

att_models[[3]] %>% get_p_val("trans_photo_yn:discussion_full") %>%
  write_stat("outputs/stats/p_val_belief_reliability_discussion.tex", digits = 2, p_val = TRUE)

# Get p-value for the list experiment (column 1)
att_models[[1]] %>% get_p_val("trans_in_list_group:discussion_full") %>%
  write_stat("outputs/stats/p_val_list_experiment.tex", digits = 2, p_val = TRUE)

sd_attitude <- atts_long %>% filter(discuss_type == "control") %>%
  pull(attitude_val) %>%
  sd(na.rm = TRUE)

att_models[[2]] %>% get_coeff("discussion_full") %>%
{. / sd_attitude} %>%
  write_stat("outputs/stats/attitude_effect_sd.tex")

# Norms ---------------------------------------------------------------

set.seed(12345)
norm_models <- list(
  "Predicted share of people\nthat pick trans\n(community)" = feols_custom(
    n3 ~ discussion_full,
    fixef = c("stratum_id", "video_type", "phase"),
    cluster = "group_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = c("discussion_full"),
      interact = NULL,
      group_control = FALSE
    ),
    data = sn %>% filter(discuss_type %in% c("control", "discussion_full")),
    ri = ri,
    n_sims = ri_sims,
    stratum = "stratum_id",
    var_types = var_types_spec,
    coef_keep = "discussion_full"
  ),
  "Predicts that other picks trans (=1)\n(within group-of-3)" = feols_custom(
    fml = group_predic_choose_trans ~ discussion_full + item_diff + reliability_diff + reliability_shown,
    fixef = c("stratum_id", "video_type", "phase"),
    cluster = "group_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = c("discussion_full"),
      interact = NULL,
      group_control = FALSE
    ),
    data = group_predic %>% filter(discuss_type %in% c("control", "discussion_full")) %>%
      filter(!is.na(group_predic_choose_trans)),
    ri = ri,
    n_sims = ri_sims,
    stratum = "stratum_id",
    var_types = var_types_spec,
    coef_keep = "discussion_full"
  )
)

p_vals_norms <- c(
  norm_models[[1]] %>% tidy %>% filter(str_detect(term, "discussion_full")) %>% pull(p.value),
  norm_models[[2]] %>% tidy %>% filter(str_detect(term, "discussion_full")) %>% pull(p.value)
)

q_vals_norms <- q_val(p_vals_norms)

p_vals_norms[[2]] %>% write_stat("outputs/stats/p_val_group_norms.tex", digits = 2, p_val = TRUE)


tex_export(
  norm_models,
  file = "outputs/tables/norms.tex",
  coef_omit = vars_to_regex("stratum_id|Intercept|video_type|n1|n2|b11|item_diff|reliability_", control_vars),
  coef_rename = coef_label,
  gof_map = fe_label_no_fe,
  dep_means = list("Mean: No discussion (private)" = "discuss_type == 'control'"),
  add_rows = combine_add_rows(
    list_to_add_rows(list("Controls" = rep("X", 2))),
    vec_to_add_rows(
      c("q-value of treatment effect", q_vals_norms[[1]], q_vals_norms[[2]])
    )
  )
)

(norm_models[[1]]$coefficients["discussion_full"] * 100) %>%
  write_stat("outputs/stats/effect_norms.tex", digits = 1)

n3_control_mean <- sn %>% filter(discuss_type %in% c("control", "discussion_full")) %>%
  pull(n3) %>% mean_na()

(norm_models[[1]]$coefficients["discussion_full"] / n3_control_mean) %>%
  write_percentage("outputs/stats/effect_norms_perc.tex", digits = 1)

(norm_models[[2]]$coefficients["discussion_full"] * 100) %>%
  write_stat("outputs/stats/effect_norms_group.tex", digits = 0)

(norm_models[[2]]$coefficients["discussion_full"] * 100) %>%
  write_stat("outputs/stats/effect_norms_group_round.tex", digits = 0)

# Group prediction figure ---------------------------------------------------------------

model_misper <- list(
  "No discussion (private)" = feols_custom(
    fml = misper ~ 1,
    cluster = ~group_id,
    data = group_predic_w_actual_choices %>% filter(discuss_type %in% c("control"), group_predic_includes_trans)
  ),
  "3-person discussion" = feols_custom(
    fml = misper ~ 1,
    cluster = "group_id",
    data = group_predic_w_actual_choices %>% filter(discuss_type %in% c("discussion_full"), group_predic_includes_trans)
  )
) %>%
  map(tidy) %>%
  bind_rows(.id = "discuss_type") %>%
  mutate(label = paste0(
    "Diff: ",
    format(round(100 * estimate, 1), nsmall = 1),
    "p.p.\n",
    "p-value: ",
    ifelse(p.value < 0.001, "<0.001", format(round(p.value, 2), nsmall = 2))
  )) %>%
  print

group_predic_long %>%
  filter(group_predic_includes_trans) %>%
  filter(discuss_type %in% c("control", "discussion_full")) %>%
  bar_chart(x = discuss_type_label, y = value, fill = name, return_data = TRUE) %>%
  left_join(model_misper %>% select(discuss_type_label = discuss_type,
                                    label)) %>%
  mutate(
    discuss_type_label = paste0(discuss_type_label, "\n", label) %>%
      factor() %>%
      fct_rev()
  ) %>%

  ggplot(aes(x = name, y = y, fill = name)) +
  geom_col(aes(colour = name), position = position_dodge(1), width = 0.8, show.legend = FALSE) +
  geom_errorbar(aes(ymin = ymin, ymax = ymax), colour = "#333333", alpha = 0.9, width = 0.2, position = position_dodge(1)) +
  facet_wrap(~discuss_type_label, nrow = 1) +
  scale_color_discrete(type = c("#956cae", "#956cae")) +
  scale_fill_discrete(type = c("#956cae", "white")) +
  coord_cartesian(ylim = c(0, 0.7)) +
  scale_y_continuous(labels = scales::percent, expand = c(0, 0)) +
  labs(x = element_blank(), y = "% chose transgender", fill = element_blank()) +
  theme_minimal() +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor.y = element_blank(),
    panel.grid.minor.x = element_blank(),
    panel.grid.major = element_blank(),
    axis.line = element_line(),
    axis.ticks = element_line(),
    panel.spacing = unit(0.7, "cm")
  ) +
  geom_text(
    aes(x = name, y = 0.03,
        label = str_glue("{format(round(as.numeric(y) * 100, 1), nsmall = 1)}%")),
  )

ggsave("outputs/figs/group_predic.pdf", width = 4.5, height = 4, scale = 1)

p_vals_group_predic_non_trans <- list(
  "No discussion (private)" = feols_custom(
    fml = misper ~ 1,
    fixef = NULL,
    cluster = ~ group_id,
    data = group_predic_w_actual_choices %>% filter(discuss_type %in% c("control"), !group_predic_includes_trans)
  ),
  "3-person discussion" = feols_custom(
    fml = misper ~ 1,
    fixef = NULL,
    cluster = ~ group_id,
    data = group_predic_w_actual_choices %>% filter(discuss_type %in% c("discussion_full"), !group_predic_includes_trans)
  )
) %>%
  map(tidy) %>%
  bind_rows(.id = "discuss_type") %>%
  mutate(label = paste0(
    "Diff: ",
    format(round(100 * estimate, 1), nsmall = 1),
    "p.p.\n",
    "p-value: ",
    ifelse(p.value < 0.001, "<0.001", format(round(p.value, 2), nsmall = 2))
  )) %>%
  print

group_predic_long %>%
  filter(!group_predic_includes_trans) %>%
  filter(discuss_type %in% c("control", "discussion_full")) %>%
  bar_chart(x = discuss_type_label, y = value, fill = name, return_data = TRUE) %>%
  left_join(p_vals_group_predic_non_trans %>% select(discuss_type_label = discuss_type,
                                                     label)) %>%
  mutate(
    discuss_type_label = paste0(discuss_type_label, "\n", label) %>%
      factor() %>%
      fct_rev()
  ) %>%

  ggplot(aes(x = name, y = y, fill = name)) +
  geom_col(aes(colour = name), position = position_dodge(1), width = 0.8, show.legend = FALSE) +
  geom_errorbar(aes(ymin = ymin, ymax = ymax), colour = "#333333", alpha = 0.9, width = 0.2, position = position_dodge(1)) +
  facet_wrap(~discuss_type_label, nrow = 1) +
  scale_color_discrete(type = c("#956cae", "#956cae")) +
  scale_fill_discrete(type = c("#956cae", "white")) +
  coord_cartesian(ylim = c(0, 0.7)) +
  scale_y_continuous(labels = scales::percent, expand = c(0, 0)) +
  labs(x = element_blank(), y = "% chose the alternative worker", fill = element_blank()) +
  theme_minimal() +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor.y = element_blank(),
    panel.grid.minor.x = element_blank(),
    panel.grid.major = element_blank(),
    axis.line = element_line(),
    axis.ticks = element_line(),
    panel.spacing = unit(0.7, "cm")
  ) +
  geom_text(
    aes(x = name, y = 0.03,
        label = str_glue("{format(round(as.numeric(y) * 100, 1), nsmall = 1)}%")),
  )

ggsave("outputs/figs/group_predic_non_trans.pdf", width = 4.5, height = 4, scale = 1)

group_predic_long %>% count_prop(name) %>%
  filter(name == "Actual\nchoices") %>%
  filter(group_predic_includes_trans) %>%
  feols_custom(
    fml = value ~ group_label,
    fixef = "stratum_id",
    cluster = ~ group_id,
    data = .
  )

group_predic_w_actual_choices %>%
  filter(discuss_type == "control", group_predic_includes_trans) %>%
  get_mean(group_predic_choose_trans) %>%
  write_percentage("outputs/stats/group_predic_control.tex", digits = 1)

group_predic_w_actual_choices %>%
  filter(discuss_type == "control", group_predic_includes_trans) %>%
  get_mean(r2_choose_trans) %>%
  write_percentage("outputs/stats/group_predic_actual_control.tex", digits = 1)

# Misperc - different across treat groups? ---------------------------------------------------------------

feols_custom(
  fml = misper ~ discuss_type,
  fixef = NULL,
  cluster = ~ group_id,
  data = group_predic_w_actual_choices %>% filter(discuss_type %in% c("control", "discussion_full"), group_predic_includes_trans)
)

misper_diff <- function(df) {

  misper_control <- feols_custom(
    fml = misper ~ 1,
    cluster = "group_id",
    data = df %>% filter(discussion_full == 0),
    quiet = TRUE
  ) %>%
    get_coeff("(Intercept)")

  misper_treat <- feols_custom(
    fml = misper ~ 1,
    cluster = "group_id",
    data = df %>% filter(discussion_full == 1),
    quiet = TRUE
  ) %>%
    get_coeff("(Intercept)")

  abs(misper_treat) - abs(misper_control)

}

# Misperception accounting ---------------------------------------------------------------

-(model_misper$estimate[[1]] / norm_models[[2]]$coefficients["discussion_full"]) %>%
  write_percentage("outputs/stats/misperc_accounting.tex", digits = 0)

misper_account <- function(df) {
  misper_estimate <- feols_custom(
    fml = misper ~ 1,
    cluster = ~group_id,
    quiet = TRUE,
    data = df %>% filter(discuss_type %in% c("control"), group_predic_includes_trans)
  )

  belief_update_estimate <- feols_custom(
    fml = group_predic_choose_trans ~ discussion_full + item_diff + reliability_diff + reliability_shown,
    fixef = c("stratum_id", "video_type", "phase"),
    cluster = "group_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = c("discussion_full"),
      interact = NULL,
      group_control = FALSE
    ),
    quiet = TRUE,
    data = df %>% filter(discuss_type %in% c("control", "discussion_full")) %>%
      filter(!is.na(group_predic_choose_trans))
  )

  perc_estimate <- -(misper_estimate$coefficients[[1]] / belief_update_estimate$coefficients["discussion_full"])

  return(perc_estimate)
}

# Using public arm to calibrate effect of SOBs on discrimination ----------------------------------------------------------------------------------------------------------------

estimate_effect_of_sobs <- function(df_group_predic, df_r2) {
  # 1. Effect of public on SOBs - 6pp
  effect_of_public_sobs <- feols_custom(
    fml = group_predic_choose_trans ~ public + discussion_pair + discussion_full + item_diff + reliability_diff + reliability_shown,
    fixef = c("stratum_id", "video_type"),
    cluster = "group_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = c("discussion_full", "discussion_pair", "public"),
      interact = NULL,
      group_control = FALSE
    ),
    quiet = TRUE,
    data = df_group_predic %>%
      filter(!is.na(group_predic_choose_trans)) %>%
      filter(phase == "phase_2") %>%
      ungroup
  ) %>%
    get_coeff("public")

  # 2. Effect of discussion on SOBs - 24pp
  effect_of_discussion_sobs <- feols_custom(
    fml = group_predic_choose_trans ~ public + discussion_pair + discussion_full + item_diff + reliability_diff + reliability_shown,
    fixef = c("stratum_id", "video_type", "phase"),
    cluster = "group_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = c("discussion_full", "discussion_pair", "public"),
      interact = NULL,
      group_control = FALSE
    ),
    quiet = TRUE,
    data = df_group_predic %>%
      filter(!is.na(group_predic_choose_trans)) %>%
      filter(discuss_type %in% c("control", "discussion_full")) %>%
      ungroup
  ) %>%
    get_coeff("discussion_full")

  # 3. Effect of observing on discrimination (omitted category is public_non_observer)
  effect_of_sobs_on_discrimination <- feols_custom(
    r2_choose_trans ~ video_type_placebo + video_type_treatment + control + public_observer + discussion_pair + discussion_full + factor(stratum_id) + delivery_incentive_exp + item_diff + r2_reliability_diff * r2_reliability_shown ,
    data = df_r2 %>% filter(phase == "phase_2") %>% filter(!is.na(r2_choose_trans)),
    fixef = c("stratum_id", "video_type", "delivery_incentive_exp", "comparator_order_in_pair", "phase"),
    cluster = "group_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = "discussion_full",
      interact = "pair_includes_trans",
      group_control = TRUE
    ),
    quiet = TRUE
  ) %>%
    get_coeff("public_observer")

  print(str_glue("Effect of observing on discrimination: {effect_of_sobs_on_discrimination}"))

  implied_effect <- (effect_of_discussion_sobs / effect_of_public_sobs) * effect_of_sobs_on_discrimination

  return(implied_effect)
}

estimate_effect_of_sobs(group_predic, r2_choices_num)

# Make a bootstrap function
single_bootstrap_effect_of_sobs <- function(df_group_predic, df_r2, i) {
  print(i)

  # For each phase x treatment, sample a selection of IDs
  sampled_ids <- df_group_predic %>%
    select(group_id, discuss_type, phase) %>%
    distinct() %>%
    group_by(discuss_type, phase) %>%
    slice_sample(prop = 1, replace = TRUE) %>%
    mutate(new_group_id = paste0("new_group_id_", row_number()))

  # Get new datasets
  df_group_predic_new <- sampled_ids %>%
    left_join(df_group_predic, by = c("group_id" = "group_id", "phase", "discuss_type"), relationship = "many-to-many") %>%
    mutate(group_id = new_group_id) %>%
    ungroup

  df_r2_new <- sampled_ids %>%
    left_join(df_r2, by = c("group_id" = "group_id", "phase", "discuss_type"),  relationship = "many-to-many") %>%
    mutate(group_id = new_group_id) %>%
    ungroup

  estimate_effect_of_sobs(df_group_predic_new, df_r2_new)
}

group_predic_slim <- group_predic %>%
  select(group_id, discuss_type, phase, public, discussion_pair, discussion_full, item_diff, reliability_diff, reliability_shown, stratum_id, video_type,
         any_of(control_vars), group_predic_choose_trans)

r2_slim <- r2_choices_num %>%
  select(group_id, discuss_type, phase, public_observer, discussion_pair, item_diff, discussion_full, stratum_id, video_type, delivery_incentive_exp, comparator_order_in_pair,
         r2_reliability_diff, pair_includes_trans, r2_reliability_shown, control, video_type_placebo, video_type_treatment, r2_choose_trans)

# point estimate
estimate_effect_of_sobs(group_predic_slim, r2_slim)

n_boot <- 100
set.seed(12345)

boot_out_sobs <- map_dbl(1:n_boot, ~ single_bootstrap_effect_of_sobs(
  group_predic_slim,
  r2_slim,
  i = .x
))

boot_out_sobs %>% quantile(0.025) 
boot_out_sobs %>% quantile(0.975) 

# 2SLS effect of SOBs on discrimination -----------------------------------

# Amalgamate SOBs to the individual level
group_predic_ind <- group_predic %>%
  ungroup %>%
  select(ind_id, treat_type_r2, group_predic_choose_trans) %>%
  count_prop(treat_type_r2) %>%
  group_by(ind_id) %>%
  summarise(group_predic_choose_trans = mean_na(group_predic_choose_trans))

# Merge onto Y
r2_choices_num_w_group_predic <- r2_choices_num %>%
  left_join(group_predic_ind, by = "ind_id")

# Run a 2SLS, IV regression
feols_custom(
  fml = r2_choose_trans ~ factor(stratum_id) + delivery_incentive_exp + item_diff + r2_reliability_diff * r2_reliability_shown | group_predic_choose_trans ~ public_observer,
  data = r2_choices_num_w_group_predic %>% filter(treat_type_r2 %in% c("control", "choices_observer"), phase == "phase_2"),
  fixef = c("stratum_id", "video_type", "delivery_incentive_exp", "comparator_order_in_pair", "phase"),
  cluster = "group_id"
)

# Distribution of [own pref - perceived other pref] ----------------------------------------------------------------------------------------------------------------

group_predic_ind <- group_predic_w_actual_choices %>%
  group_by(ind_id) %>%
  summarise(group_predic_choose_trans = mean_na(group_predic_choose_trans)) %>%
  count_prop(group_predic_choose_trans)

r2_ind <- r2_choices_num %>%
  group_by(ind_id) %>%
  summarise(r2_choose_trans = mean_na(r2_choose_trans)) %>%
  count_prop(r2_choose_trans) %>%
  ungroup

full_join(group_predic_ind, r2_ind, by = "ind_id") %>%
  mutate(pref_diff = r2_choose_trans - group_predic_choose_trans) %>%
  count_prop(pref_diff) %>%
  hist_basic(pref_diff)


# I want to see
# (1) How does group composition affect WTSAY - i.e. having one activist in the group is sufficient
#
# (2)

# Group predic - phase 2 ---------------------------------------------------------------

group_predic_long %>% count_prop(name)

r2_choices_num %>% filter(public_observer %in% 1) %>% filter(phase == "phase_2") %>% get_mean("r2_choose_trans") # 46%
group_predic_long %>% filter(name == "Actual\nchoices") %>% count_prop(public_observer) %>% filter(public_observer == 1) %>% 
count_prop(group_predic_includes_trans, r2_includes_trans, r2_trans, group_predic_trans, name) %>% 
get_mean("value") # 39%

# USE ALL CHOICES INSTEAD
group_predic_long_alt <- list(
  group_predic_w_member_id %>% mutate(name = "Predicted choices\n(within group)", value = group_predic_choose_trans),
  r2_choices_num  %>% mutate(
    name = "Actual\nchoices",
    value = r2_choose_trans
  )
) %>% 
map(~ select(., -group_label, -round)) %>% 
bind_rows() %>% 
mutate(group_predic_includes_trans = !is.na(value)) %>% 
ungroup %>% 
  mutate(
    phase_2_group_predic_type = case_when(
      discuss_type %in% c("control") ~ "No discussion (private)",
      discuss_type %in% c("choosing_only") & group_predic_member_is_observer == 1 & name == "Predicted choices\n(within group)" ~ "No discussion (public)\n(predictions about\nobservers)",
      discuss_type %in% c("choosing_only") & group_predic_member_is_non_observer == 1 & name == "Predicted choices\n(within group)" ~ "No discussion (public)\n(predictions about\nnon-observers)",
      discuss_type %in% c("choosing_only") & public_observer == 1 & name == "Actual\nchoices" ~ "No discussion (public)\n(predictions about\nobservers)",
      discuss_type %in% c("choosing_only") & public_non_observer == 1 & name == "Actual\nchoices" ~ "No discussion (public)\n(predictions about\nnon-observers)",
      discuss_type %in% c("discussion_pair") & group_predic_member_is_speaker == 1 & name == "Predicted choices\n(within group)" ~ "2-person discussion\n(predictions about speakers)",
      discuss_type %in% c("discussion_pair") & group_predic_member_is_listener == 1 & name == "Predicted choices\n(within group)" ~ "2-person discussion\n(predictions about listeners)",
      discuss_type %in% c("discussion_pair") & is_speaker %in% 1 & name == "Actual\nchoices" ~ "2-person discussion\n(predictions about speakers)",
      discuss_type %in% c("discussion_pair") & is_listener %in% 1 & name == "Actual\nchoices" ~ "2-person discussion\n(predictions about listeners)",
      discuss_type %in% c("discussion_full") ~ "3-person discussion"
    ) %>%
      factor() %>%
      fct_relevel("No discussion (private)", 
                 "No discussion (public)\n(predictions about\nobservers)", 
                 "No discussion (public)\n(predictions about\nnon-observers)", 
                 "2-person discussion\n(predictions about speakers)", 
                 "2-person discussion\n(predictions about listeners)", 
                 "3-person discussion")
  )

# Create a list of p-values for each phase_2_group_predic_type
p_vals_group_predic <- group_predic_long_alt %>%
  filter(group_predic_includes_trans) %>%
  filter(!is.na(phase_2_group_predic_type)) %>%
  split(.$phase_2_group_predic_type) %>%
  map(~ feols_custom(
    fml = value ~ name, fixef = NULL, cluster = ~ group_id,
    data = .
  )) %>%
  map_dfr(~ tidy(.), .id = "phase_2_group_predic_type") %>%
  filter(term == "namePredicted choices\n(within group)") %>%
  mutate(
    p_value = p.value,
    stars = case_when(
      p_value < 0.01 ~ "***",
      p_value < 0.05 ~ "**",
      p_value < 0.1 ~ "*",
      TRUE ~ ""
    ),
    label = paste0(
      "Diff: ",
      format(round(100 * estimate, 1), nsmall = 1),
      "p.p.\n",
      "p-value: ",
      ifelse(p.value < 0.001, "<0.001", 
      ifelse(p.value < 0.005, format(round(p.value, 3), nsmall = 3),
      format(round(p.value, 2), nsmall = 2)))
    )
  )

# Display the p-values
p_vals_group_predic

-(model_misper$estimate[[1]] / norm_models[[2]]$coefficients["discussion_full"]) %>%
  write_percentage("outputs/stats/misperc_accounting.tex", digits = 0)


p_vals_group_predic %>% 
  filter(phase_2_group_predic_type == "No discussion (private)") %>% 
  pull(estimate) %>% 
  times_100 %>% 
  {. * -1} %>% 
  write_stat("outputs/stats/misper_diff_control.tex", digits = 1)


# Get corresponding p-value for 3-person discussion
p_vals_group_predic %>% 
  filter(phase_2_group_predic_type == "3-person discussion") %>% 
  pull(estimate) %>% 
  times_100 %>% 
  write_stat("outputs/stats/misper_diff_treat.tex", digits = 1)

# Get p-values for misperception in control and treatment groups
p_vals_group_predic %>% 
  filter(phase_2_group_predic_type == "No discussion (private)") %>% 
  pull(p_value) %>% 
  write_stat("outputs/stats/pval_misper_control_alt.tex", digits = 2, p_value = TRUE)

p_vals_group_predic %>% 
  filter(phase_2_group_predic_type == "3-person discussion") %>% 
  pull(p_value) %>% 
  write_stat("outputs/stats/pval_misper_treat_alt.tex", digits = 2, p_value = TRUE)

# Get misperception statistics for 2-person discussion (speakers)
p_vals_group_predic %>% 
  filter(phase_2_group_predic_type == "2-person discussion\n(predictions about speakers)") %>% 
  pull(estimate) %>% 
  times_100 %>% 
  write_stat("outputs/stats/misper_diff_pair_speakers.tex", digits = 1)

# Get p-value for misperception in 2-person discussion (speakers)
p_vals_group_predic %>% 
  filter(phase_2_group_predic_type == "2-person discussion\n(predictions about speakers)") %>% 
  pull(p_value) %>% 
  write_stat("outputs/stats/pval_misper_pair_speakers.tex", digits = 2, p_value = TRUE)

group_predic_long_alt %>%

  filter(group_predic_includes_trans) %>%
  bar_chart(x = phase_2_group_predic_type, y = value, fill = name, return_data = TRUE) %>%
  filter(!is.na(phase_2_group_predic_type)) %>% 
  left_join(p_vals_group_predic %>% select(phase_2_group_predic_type, label)) %>%
  mutate(
    phase_2_group_predic_type = phase_2_group_predic_type %>%
      fct_relevel("No discussion (private)", 
                 "No discussion (public)\n(predictions about\nobservers)", 
                 "No discussion (public)\n(predictions about\nnon-observers)", 
                 "2-person discussion\n(predictions about speakers)", 
                 "2-person discussion\n(predictions about listeners)", 
                 "3-person discussion") %>%
      fct_label_append("\n") %>%
      fct_label_append(label)
  ) %>%
  mutate(name = name %>% factor() %>% fct_rev()) %>% 

  ggplot(aes(x = name, y = y, fill = name)) +
  geom_col(aes(colour = name), position = position_dodge(1), width = 0.8, show.legend = FALSE) +
  geom_errorbar(aes(ymin = ymin, ymax = ymax), colour = "#333333", alpha = 0.9, width = 0.2, position = position_dodge(1)) +
  facet_wrap(~phase_2_group_predic_type, nrow = 1) +
  scale_color_discrete(type = c("#956cae", "#956cae")) +
  scale_fill_discrete(type = c("white", "#956cae")) +
  coord_cartesian(ylim = c(0, 0.7)) +
  scale_y_continuous(labels = scales::percent, expand = c(0, 0)) +
  labs(x = element_blank(), y = "% chose the transgender worker", fill = element_blank()) +
  theme_minimal() +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor.y = element_blank(),
    panel.grid.minor.x = element_blank(),
    panel.grid.major = element_blank(),
    axis.line = element_line(),
    axis.ticks = element_line(),
    panel.spacing = unit(0.7, "cm")
  ) +
  geom_text(
    aes(x = name, y = 0.03,
        label = str_glue("{format(round(as.numeric(y) * 100, 1), nsmall = 1)}%")),
  )

# Adjust the width to accommodate the additional panel
ggsave("outputs/figs/group_predic_phase_2.pdf", width = 12, height = 4, scale = 1)
group_predic_w_actual_choices %>% count_prop(treat_type_r2)

misper_public <- feols_custom(
   fml = misper ~ 1,
   fixef = NULL,
   cluster = ~group_id,
   data = group_predic_w_actual_choices %>% filter(treat_type_r2 %in% c("choices_control", "choices_observer"))
 )

# group_predic_w_actual_choices |> count_prop(treat_type_r2)

misper_public$coefficients[[1]] %>% times_100 %>% write_stat("outputs/stats/misper_public.tex", digits = 1)
misper_public %>% get_p_val("(Intercept)") %>% write_stat("outputs/stats/misper_public_pval.tex", digits = 2, p_value = TRUE)

# Difference between prediction about listeners and speakers

model_group_predic_listeners_vs_speakers <- feols_custom(
  fml = group_predic_choose_trans ~ control + public + group_predic_member_is_listener,
  fixef = c("stratum_id", "video_type"),
  cluster = ~ group_id,
  lasso = TRUE,
  lasso_options = list(
    potential_controls = control_vars,
    t = c("discussion_full", "control", "public", "group_predic_member_is_speaker"),
    interact = NULL,
    group_control = FALSE
  ),
  data = group_predic_w_actual_choices %>% filter(phase == "phase_2") %>%
    filter(group_predic_includes_trans) %>%
    mutate(group_predic_member_is_speaker = as.numeric(group_predic_member_is_speaker %in% 1),
           group_predic_member_is_listener = as.numeric(group_predic_member_is_listener %in% 1)),
)

model_group_predic_listeners_vs_speakers %>%
  get_p_val("group_predic_member_is_listener") %>%
  write_stat("outputs/stats/p_val_group_predic_listener_vs_speakers.tex", digits = 2, p_value = TRUE)

model_group_predic_listeners_vs_speakers %>%
  get_coeff("group_predic_member_is_listener") %>%
{. * -100} %>%
  write_stat("outputs/stats/diff_group_predic_listener_vs_speakers.tex", digits = 1)

# P-value of public on group_predic
feols_custom(
  fml = group_predic_choose_trans ~ public_non_observer + public_observer + discussion_full + discussion_pair,
  fixef = c("stratum_id", "video_type", "phase"),
  cluster = ~ group_id,
  lasso = TRUE,
  lasso_options = list(
    potential_controls = control_vars,
    t = c("discussion_full", "discussion_pair", "public"),
    interact = NULL,
    group_control = FALSE
  ),
  data = group_predic_w_actual_choices %>% filter(phase == "phase_2") %>%
    mutate(discussion_pair = ifelse(is.na(discussion_pair), 0, discussion_pair)) %>%
    filter(group_predic_includes_trans) %>%
    mutate(group_predic_member_is_speaker = as.numeric(group_predic_member_is_speaker %in% 1),
           group_predic_member_is_listener = as.numeric(group_predic_member_is_listener %in% 1)),
)

group_predic_w_actual_choices %>% filter(group_predic_member_is_observer == 1) %>% get_mean("group_predic_choose_trans") # 39\%

# Group predic accuracy ---------------------------------------------------------------

group_predic_accuracy_main_sample <- group_predic_w_actual_choices %>%
  filter(discuss_type %in% c("control", "discussion_full")) %>%
  mutate(group_predic_includes_trans_label = ifelse(group_predic_includes_trans,
                                                    "Choice includes transgender",
                                                    "Choice doesn't include transgender")) %>%
  bar_chart(x = discuss_type_label, y = correct_predic, perc = TRUE, fill = group_predic_includes_trans_label) +
  labs(x = element_blank(), y = "% of correct predictions", fill = element_blank()) + theme_bw() +
  scale_y_continuous(labels = scales::percent, expand = c(0, 0), limits = c(0, 0.75))

group_predic_accuracy_phase_2 <- group_predic_w_actual_choices %>%
  filter(phase == "phase_2") %>%
  mutate(group_predic_includes_trans_label = ifelse(group_predic_includes_trans,
                                                    "Choice includes transgender",
                                                    "Choice doesn't include transgender")) %>%
  bar_chart(x = discuss_type_label, y = correct_predic, perc = TRUE, fill = group_predic_includes_trans_label) +
  labs(x = element_blank(), y = "% of correct predictions", fill = element_blank()) + theme_bw() +
  scale_y_continuous(labels = scales::percent, expand = c(0, 0), limits = c(0, 0.75))

ggarrange(
  group_predic_accuracy_main_sample,
  group_predic_accuracy_phase_2,
  ncol = 2,
  nrow = 1,
  widths = c(3, 5),
  labels = c("3-person discussion sample", "Phase 2 sample"),
  common.legend = TRUE,
  label.x = c(-0.15, 0.01),
  label.y = c(0.96)
)

ggsave("outputs/figs/group_predic_accuracy.pdf", width = 10, height = 5, scale = 1)

# Group predic - video ---------------------------------------------------------------

group_predic_long %>%
  mutate(
    phase_2_group_predic_type = case_when(
      discuss_type %in% c("control") ~ "No discussion (private)",
      discuss_type %in% c("choosing_only") ~ "No discussion (public)",
      discuss_type %in% c("discussion_pair") & group_predic_member_is_speaker == 1 ~ "2-person discussion\n(predictions about speakers)",
      discuss_type %in% c("discussion_pair") & group_predic_member_is_listener == 1 ~ "2-person discussion\n(predictions about listeners)",
      discuss_type %in% c("discussion_full") ~ "3-person discussion"
    ) %>%
      factor() %>%
      fct_relevel("No discussion (private)", "No discussion (public)", "2-person discussion\n(predictions about speakers)", "2-person discussion\n(predictions about listeners)", "3-person discussion") %>%
      fct_rev()
  ) %>%

  filter(group_predic_includes_trans) %>%
  filter(phase == "phase_2") %>%
  bar_chart(x = video_type, y = value, fill = name, return_data = TRUE) %>%
  ggplot(aes(x = name, y = y, fill = name)) +
  geom_col(aes(colour = name), position = position_dodge(1), width = 0.8, show.legend = FALSE) +
  geom_errorbar(aes(ymin = ymin, ymax = ymax), colour = "#333333", alpha = 0.9, width = 0.2, position = position_dodge(1)) +
  facet_wrap(~video_type, nrow = 1) +
  scale_color_discrete(type = c("#956cae", "#956cae")) +
  scale_fill_discrete(type = c("#956cae", "white")) +
  coord_cartesian(ylim = c(0, 0.7)) +
  scale_y_continuous(labels = scales::percent, expand = c(0, 0)) +
  labs(x = element_blank(), y = "% chose the transgender worker", fill = element_blank()) +
  theme_minimal() +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor.y = element_blank(),
    panel.grid.minor.x = element_blank(),
    panel.grid.major = element_blank(),
    axis.line = element_line(),
    axis.ticks = element_line(),
    panel.spacing = unit(0.7, "cm")
  ) +
  geom_text(
    aes(x = name, y = 0.03,
        label = str_glue("{format(round(as.numeric(y) * 100, 1), nsmall = 1)}%")),
  )

# R1 choices ---------------------------------------------------------------

r1_summ <- r1_choices %>%
  filter(!is.na(pair_includes_trans)) %>%
  group_by(treat_type_r1, treat_type_r1_label, pair_includes_trans) %>%
  filter(treat_type_r1 %in% c("control", "discussion_full")) %>%
  summarise(mean_cl_boot(r1_choose_comparator),
            n_obs = n(),
            n_ind = n_distinct(ind_id),
            n_groups = n_distinct(group_id)
  ) %>%
  mutate(
    pair_includes_trans_label = ifelse(pair_includes_trans, "Worker\nis trans", "Worker\nis non-trans") %>% paste0(., "\n(Obs=", n_obs, ")")
  )


r1_summ %>%
  filter(!is.na(treat_type_r1)) %>%
  filter(treat_type_r1 %in% c("control", "discussion_full")) %>%
  ggplot(aes(x = pair_includes_trans_label, y = y, ymin = ymin, ymax = ymax, fill = pair_includes_trans)) +
  geom_col(show.legend = FALSE, position = position_dodge(0.8)) +
  geom_errorbar(width = 0.2, position = position_dodge(0.8)) +
  labs(y = "Probability of selecting the worker", fill = element_blank(), x = element_blank()) +
  geom_text(aes(y = 0.03, label = str_glue("{round(y, 3)*100}%")), position = position_dodge(0.8), size = 3) +
  facet_wrap(~ treat_type_r1_label, scales = "free_x", nrow = 1) +
  theme_minimal() +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor.y = element_blank(),
    panel.grid.minor.x = element_blank(),
    panel.grid.major = element_blank(),
    axis.line = element_line(),
    axis.ticks = element_line(),
    panel.spacing = unit(0.7, "cm")
  )

ggsave("outputs/figs/r1_bar.pdf", width = 4.5, height = 6, scale = 1)

# R1 - 3-person discussion table ---------------------------------------------------------------

models_r1 <- list(
  "Chose worker in\ntreatment round (=1)" = feols_custom(
    r1_choose_comparator ~ pair_includes_trans * (discussion_full),
    data = r1_choices %>% filter(discuss_type %in% c("control", "discussion_full")),
    fixef = NULL,
    cluster = "group_id",
    ri = ri,
    n_sims = ri_sims,
    stratum = "stratum_id",
    var_types = var_types_spec,
    coef_keep = "^pair_includes_trans$|(^pair_includes_trans\\:discussion_full$)|^discussion_full$|item_diff|r2_reliability_diff|r2_reliability_shown",

  ),
  "Chose worker in\ntreatment round (=1)" = feols_custom(
    r1_choose_comparator ~ pair_includes_trans_alt + pair_includes_trans_alt * (video_type + discussion_full + factor(stratum_id)) + item_diff + quality_diff * quality_included,
    data = r1_choices %>% filter(discuss_type %in% c("control", "discussion_full")),
    fixef = c("stratum_id", "video_type", "delivery_incentive_exp", "comparator_order_in_pair", "phase"),
    cluster = "group_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = c("discussion_full"),
      interact = "pair_includes_trans",
      group_control = FALSE
    ),
    ri = ri,
    n_sims = ri_sims,
    stratum = "stratum_id",
    var_types = var_types_spec,
    coef_keep = "^pair_includes_trans$|(^pair_includes_trans_alt\\:discussion_full$)|^discussion_full$",

  ),
  "Chose trans in\ntreatment round (=1)\n(pairs with trans only)" = feols_custom(
    r1_choose_comparator ~ (pair_includes_trans) * (video_type + discussion_full + factor(stratum_id)) + item_diff + quality_diff * quality_included,
    data = r1_choices %>% filter(discuss_type %in% c("control", "discussion_full"), !is.na(r1_choose_trans)),
    fixef = c("stratum_id", "video_type", "delivery_incentive_exp", "comparator_order_in_pair", "phase"),
    cluster = "group_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = c("discussion_full"),
      interact = "pair_includes_trans",
      group_control = FALSE
    ),
    ri = ri,
    n_sims = ri_sims,
    stratum = "stratum_id",
    var_types = var_types_spec,
    coef_keep = "^pair_includes_trans$|(^pair_includes_trans_alt\\:discussion_full$)|^discussion_full$",

  )
)

#  For RI - needs to be just numeric indicator variables...

r1_choices %>% filter(discuss_type %in% c("control", "discussion_full")) %>%
  mutate(pair_includes_trans = as.logical(pair_includes_trans)) %>%
  bar_chart(x = discuss_type, fill = pair_includes_trans, y = r1_choose_comparator)

tex_export(
  models_r1,
  file = "outputs/tables/r1_main.tex",
  coef_rename = coef_label,
  coef_reorder = "pair_includes_trans:discussion_full",
  gof_map = fe_label_no_fe,
  coef_omit = vars_to_regex(c("stratum_id|Intercept|video_type|dq12|phase|pair_includes_trans_alt$|item|reliability|quality",
                              control_vars_except_items_reliability
  )),
  dep_means = list(
    "Mean: No discussion (private) + worker is non-trans" = "discuss_type == 'control' & pair_includes_trans == FALSE",
    "Mean: No discussion (private) + worker is T" = "discuss_type == 'control' & pair_includes_trans == TRUE"
  ),
  stat_vec = "({std.error}) [{p.value}]",
  add_rows = list_to_add_rows(control_rows)
)

# R1 - point graph ---------------------------------------------------------------

r1_choices %>%
  filter(discuss_type %in% c("control", "discussion_full")) %>%
  mutate(discussion_full = as.numeric(discuss_type == "discussion_full")) %>%
  mutate(
    control_non_trans = !pair_includes_trans & discussion_full == 0,
    treat_non_trans = !pair_includes_trans & discussion_full == 1,
    control_trans = pair_includes_trans & discussion_full == 0,
    treat_trans = pair_includes_trans & discussion_full == 1,
  ) %>%
  mutate(across(c(control_non_trans, treat_non_trans, control_trans, treat_trans), as.numeric)) %>%

  feols_custom(
    r1_choose_comparator ~ (control_trans + treat_non_trans + treat_trans),
    data = .,
    fixef = c("stratum_id", "video_type", "comparator_order_in_pair"),
    cluster = "group_id",
    stratum = "stratum_id",
  ) %>%

  tidy_90 %>%

  filter(str_detect(term, "(control_trans|treat_non_trans|treat_trans)$")) %>%
  add_row(term = "omitted", estimate = 0, std.error = 0) %>%
  mutate(
    pair_includes_trans = str_detect(term, "control_trans|treat_trans"),
    discuss = str_detect(term, "treat")
  ) %>%

  mutate(
    discuss_label = ifelse(discuss, "Group discussion", "Control") %>%
      fct_relevel("Control"),
    pair_includes_trans_label = ifelse(pair_includes_trans, "Worker\nis trans", "Worker\nis non-trans")
  ) %>%

  ggplot(aes(x = pair_includes_trans_label, colour = pair_includes_trans)) +
  geom_hline(yintercept = 0, linetype = "dashed", colour = "skyblue") +
  geom_errorbar(aes(ymin = conf.low_90, ymax = conf.high_90), position = position_dodge(0.8), show.legend = F, width = 0, size = 1.2) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), position = position_dodge(0.8), show.legend = F, width = 0, size = 0.7) +
  geom_point(aes(y = estimate), position = position_dodge(0.8), show.legend = F, size = 3) +
  theme_bw() +
  theme(panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.grid.major = element_blank()) +
  labs(x = element_blank(), y = "Effect on probability of selecting worker\n in treatment round") +
  facet_wrap(~ discuss_label, scales = "free_x")

ggsave("outputs/figs/r1_point.pdf", width = 4.5, height = 3.5, scale = 1)

# R1 choices - phase 2 ---------------------------------------------------------------

r1_choices %>%
  filter(!is.na(pair_includes_trans)) %>%
  group_by(treat_type_r1, treat_type_r1_label, pair_includes_trans) %>%
  filter(phase == "phase_2") %>%
  summarise(mean_cl_boot(r1_choose_comparator),
            n_obs = n(),
            n_ind = n_distinct(ind_id),
            n_groups = n_distinct(group_id)
  ) %>%
  mutate(
    pair_includes_trans_label = ifelse(pair_includes_trans, "Worker\nis trans", "Worker\nis non-trans") %>% paste0(., "\n(Obs=", n_obs, ")")
  ) %>%

  filter(!is.na(treat_type_r1)) %>%
  ggplot(aes(x = pair_includes_trans_label, y = y, ymin = ymin, ymax = ymax, fill = as.logical(pair_includes_trans))) +
  geom_col(show.legend = FALSE, position = position_dodge(0.8)) +
  geom_errorbar(width = 0.2, position = position_dodge(0.8)) +
  labs(y = "Probability of selecting the worker", fill = element_blank(), x = element_blank()) +
  geom_text(aes(y = 0.03, label = str_glue("{round(y, 3)*100}%")), position = position_dodge(0.8), size = 3) +
  facet_wrap(~ treat_type_r1_label, scales = "free_x", nrow = 1) +
  theme_minimal() +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor.y = element_blank(),
    panel.grid.minor.x = element_blank(),
    panel.grid.major = element_blank(),
    axis.line = element_line(),
    axis.ticks = element_line(),
    panel.spacing = unit(0.7, "cm")
  )
ggsave("outputs/figs/r1_bar_phase2.pdf", width = 10, height = 6, scale = 1)

# R1 phase 2 - table ---------------------------------------------------------------

models_r1_phase2 <- list(
  "Chose worker in\ntreatment round (=1)" = feols_custom(
    r1_choose_comparator ~ pair_includes_trans * (public + discussant),
    data = r1_choices %>% filter(phase == "phase_2", !(is_listener %in% 1)) %>% count_prop(public, discussant),
    fixef = NULL,
    cluster = "group_id",
    ri = FALSE,
    n_sims = ri_sims,
    stratum = "stratum_id",
    var_types = var_types_spec,
    coef_omit = "stratum_id|Intercept|video"
  ),
  "Chose worker in\ntreatment round (=1)" = feols_custom(
    r1_choose_comparator ~ pair_includes_trans_alt + pair_includes_trans_alt * (video_type + public + discussant + factor(stratum_id)) + item_diff + quality_diff * quality_included,
    data = r1_choices %>% filter(phase == "phase_2", !(is_listener %in% 1)) %>% count_prop(public, discussant),
    fixef = c("stratum_id", "video_type", "delivery_incentive_exp", "comparator_order_in_pair"),
    cluster = "group_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = c("public", "discussant"),
      interact = "pair_includes_trans",
      group_control = FALSE
    ),
    ri = FALSE,
    n_sims = ri_sims,
    stratum = "stratum_id",
    var_types = var_types_spec,
    coef_omit = "stratum_id|Intercept|video"
  ),
  "Chose trans in\ntreatment round (=1)\n(pairs with trans only)" = feols_custom(
    r1_choose_comparator ~ pair_includes_trans_alt + (pair_includes_trans) * (video_type + public + discussant + factor(stratum_id)) + item_diff + quality_diff * quality_included,
    data = r1_choices %>% filter(phase == "phase_2", !(is_listener %in% 1), !is.na(r1_choose_trans)) %>% count_prop(public, discussant),
    fixef = c("stratum_id", "video_type", "delivery_incentive_exp", "comparator_order_in_pair"),
    cluster = "group_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = c("public", "discussant"),
      interact = "pair_includes_trans",
      group_control = FALSE
    ),
    ri = FALSE,
    n_sims = ri_sims,
    stratum = "stratum_id",
    var_types = var_types_spec,
    coef_omit = "stratum_id|Intercept|video"
  )
)

p_vals_r1_phase_2 <- models_r1_phase2 %>%
  purrr::map(
    get_comparison_p_vals, dummy_vars = c("control", "public", "discussant"),
    ri = FALSE,
    n_sims = 20, stratum = "stratum_id", var_types = var_types_spec,
    coef_keep = "control|public|discuss"
  ) %>%
  purrr::map(filter_interact_terms, vars = c("control", "public", "discussant"), interact = "pair_includes_trans(_alt)?") %>%
  map(filter, row_number() == 2) %>%
  map(~ .x %>% mutate(across(c(base, term), coef_label)))

tex_export(
  models_r1_phase2,
  file = "outputs/tables/r1_phase_2.tex",
  coef_rename = coef_label,
  gof_map = fe_label_no_fe,
  coef_omit = vars_to_regex("stratum_id|Intercept|video_type|dq12|pair_includes_trans_alt$|item|reliability|quality", control_vars_except_items_reliability),
  dep_means = list(
    "Mean: No discussion (private) + worker is non-trans" = "discuss_type == 'control' & pair_includes_trans == FALSE",
    "Mean: No discussion (private) + worker is T" = "discuss_type == 'control' & pair_includes_trans == TRUE"
  ),
  stat_vec = "({std.error}) [{p.value}]",
  coef_reorder = c("pair_includes_trans:discussant", "pair_includes_trans:public", "pair_includes_trans", "discussant", "public"),
  add_rows = combine_add_rows(
    list_to_add_rows(control_rows),
    p_vals_to_add_rows(p_vals_r1_phase_2)
  )
)

# p-value speaker vs full
models_r1_phase2[[1]] %>%
  tidy() %>%
  filter(term == "pair_includes_trans:public") %>%
  pull(p.value) %>%
  write_stat("outputs/stats/p_val_r1_public.tex", digits = 2)

r1_choices %>% filter(phase == "phase_2") %>%
  mutate(pair_includes_trans = fct_cross(factor(pair_includes_trans), factor(pair_includes_female))) %>%
  bar_chart(x = treat_type_r1_pooled, y = r1_choose_comparator, fill= pair_includes_trans)

# Penalty for trans in R1:
trans_penalty_r1 <- c(
  models_r1[[1]]$coefficients["pair_includes_trans"],
  models_r1_phase2[[1]]$coefficients["pair_includes_trans"]
) %>%
{. * (-100)}

trans_penalty_r1 %>% min() %>% write_stat("outputs/stats/effect_trans_r1_min.tex", digits = 1)
trans_penalty_r1 %>% max() %>% write_stat("outputs/stats/effect_trans_r1_max.tex", digits = 1)


(models_r1[[1]]$coefficients["pair_includes_trans:discussion_full"] * 100) %>% write_stat("outputs/stats/effect_discussion_r1.tex", digits = 0)

((models_r1[[1]]$coefficients["pair_includes_trans"] + models_r1[[1]]$coefficients["pair_includes_trans:discussion_full"]) * 100) %>%
  write_stat("outputs/stats/effect_trans_discussion_r1.tex", digits = 0)

models_r1_phase2[[1]] %>% get_p_val("pair_includes_trans:public")


# ...... ---------------------------------------------------------------


# Confounders - treatment effects ---------------------------------------------------------------

set.seed(12345)
confounder_models <- list(
  "Remembered word\n'transgender' (=1)\n(Phase 1 only)" = feols_custom(
    trans_remembered ~ discussion_full + prop_salience_minus_trans,
    fixef = c("stratum_id", "video_type", "delivery_incentive_exp"),
    cluster = "group_id",
    data = salience,
    ri = FALSE,
    n_sims = ri_sims,
    stratum = "stratum_id",
    var_types = var_types_spec,
    coef_keep = vars_to_regex(c("discussion_full", "prop_salience_minus_trans")),
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = "discussion_full",
      interact = NULL,
      group_control = TRUE
    )
  ),
  "Correctly guess purpose (=1)\n(after main outcome)" = feols_custom(
    purpose_0_trans ~ discussion_full,
    fixef = c("stratum_id", "video_type", "delivery_incentive_exp", "phase"),
    cluster = "group_id",
    data = purpose %>% filter(discuss_type %in% c("discussion_full", "control")),
    ri = FALSE,
    n_sims = ri_sims,
    stratum = "stratum_id",
    var_types = var_types_spec,
    coef_keep = vars_to_regex(c("discussion_full", "prop_salience_minus_trans")),
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = "discussion_full",
      interact = NULL,
      group_control = TRUE
    )
  ),
  "Correctly guess purpose (=1)\n(end of experiment)" = feols_custom(
    purpose_2_trans ~ discussion_full,
    fixef = c("stratum_id", "video_type", "delivery_incentive_exp", "phase"),
    cluster = "group_id",
    data = purpose %>% filter(discuss_type %in% c("discussion_full", "control")),
    ri = FALSE,
    n_sims = ri_sims,
    stratum = "stratum_id",
    var_types = var_types_spec,
    coef_keep = vars_to_regex(c("discussion_full", "prop_salience_minus_trans")),
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = "discussion_full",
      interact = NULL,
      group_control = TRUE
    )
  )
)

tex_export(
  confounder_models,
  file = "outputs/tables/confounders.tex",
  coef_omit = vars_to_regex("stratum_id|Intercept|video_type", control_vars),
  coef_rename = coef_label,
  gof_omit = "AIC|IC|RMSE|Std|R2 Adj|FE:",
  dep_means = list(
    "Mean: No discussion (private)" = "discuss_type == 'control'",
    "Mean: 3-person discussion" = "discuss_type == 'discussion_full'"
  ),
  add_rows = list_to_add_rows(
    list("Controls" = c("X", "X", "X"))
  ),
  fmt = 3
)

purpose %>% filter(discuss_type %in% c("discussion_full", "control")) %>%
  get_mean(purpose_0_trans) %>%
  write_percentage("outputs/stats/purpose_0_mean.tex", digits = 0)

purpose %>% filter(discuss_type %in% c("discussion_full", "control")) %>%
  get_mean(purpose_2_trans) %>%
  write_percentage("outputs/stats/purpose_2_mean.tex", digits = 0)

# Confounders - treatment effect interactions ---------------------------------------------------------------

confounder_interactions <- list(
  "Phases 1 + 2" = feols_custom(
    r2_choose_trans ~ discussion_full * purpose_0_trans + video_type  + factor(stratum_id),
    data = r2_with_purpose %>% filter(!is.na(r2_choose_trans)) %>% filter(discuss_type %in% c("discussion_full", "control")),
    fixef = c("stratum_id", "video_type", "delivery_incentive_exp", "comparator_order_in_pair", "phase"),
    cluster = "group_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = "discussion_full",
      interact = NULL,
      group_control = TRUE
    )
  ),
  "Phases 1 + 2" = feols_custom(
    r2_choose_trans ~ discussion_full * purpose_2_trans + video_type  + factor(stratum_id),
    data = r2_with_purpose %>% filter(!is.na(r2_choose_trans)) %>% filter(discuss_type %in% c("discussion_full", "control")),
    fixef = c("stratum_id", "video_type", "delivery_incentive_exp", "comparator_order_in_pair", "phase"),
    cluster = "group_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = "discussion_full",
      interact = NULL,
      group_control = TRUE
    )
  ),
  "Phase 1 only" = feols_custom(
    r2_choose_trans ~ discussion_full * high_sdb + video_type  + factor(stratum_id),
    data = r2_with_sdb %>% filter(!is.na(r2_choose_trans)) %>%
      filter(discuss_type %in% c("discussion_full", "control")) %>%
      filter(phase == "phase_1"),
    fixef = c("stratum_id", "video_type", "delivery_incentive_exp", "comparator_order_in_pair"),
    cluster = "group_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = "discussion_full",
      interact = NULL,
      group_control = TRUE
    )
  ),
  "Phase 1 only" = feols_custom(
    r2_choose_trans ~ discussion_full * (trans_remembered) + above_median_prop_salience + video_type  + factor(stratum_id),
    data = r2_with_salience %>% filter(!is.na(r2_choose_trans), phase == "phase_1") %>% filter(discuss_type %in% c("discussion_full", "control")),
    fixef = c("stratum_id", "video_type", "delivery_incentive_exp", "comparator_order_in_pair"),
    cluster = "group_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = "discussion_full",
      interact = NULL,
      group_control = TRUE
    )
  )
)

tex_export(
  confounder_interactions,
  file = "outputs/tables/confounder_interactions.tex",
  coef_omit = vars_to_regex("stratum_id|Intercept|video_type|items", control_vars),
  coef_rename = coef_label,
  stat_vec = "long",
  gof_map = fe_label_no_fe, 
  add_rows = list_to_add_rows(
    list("Controls" = c("X", "X", "X", "X"))
  ),
  additional_header = vec_to_custom_header(
    c(" ", rep("Chose trans in private outcome round (=1)", 4))
  )
)

# Anonymous choices ---------------------------------------------------------------

anon_models <- list(
  "Chose worker in\nprivate pick-up round (=1)" = feols_custom(
    anon_choose_comparator ~ pair_includes_trans * (discussion_full+ discussion_pair_speaker + discussion_pair_listener + public),
    data = anon_choices %>% filter(phase == "phase_2"),
    fixef = NULL,
    cluster = "group_id",
    stratum = "stratum_id",
    coef_omit = "stratum_id|Intercept|video"
  ),
  "Chose worker in\nprivate pick-up round (=1)" = feols_custom(
    anon_choose_comparator ~ pair_includes_trans_alt + pair_includes_trans_alt * (discussion_full + discussion_pair_speaker + discussion_pair_listener + public + stratum_id + video_type + delivery_incentive_exp),
    data = anon_choices %>% filter(phase == "phase_2"),
    fixef = c("stratum_id", "video_type", "delivery_incentive_exp", "phase"),
    cluster = "group_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = c("discussion_full"),
      interact = "pair_includes_trans",
      group_control = FALSE
    ),
    stratum = "stratum_id",
    coef_omit = "stratum_id|Intercept|video"
  ),
  "Chose trans in\nprivate pick-up round (=1)\n(trans pairs only)" = feols_custom(
    anon_choose_comparator ~ video_type_placebo + video_type_treatment + discussion_full + discussion_pair_speaker + discussion_pair_listener + public + factor(stratum_id) + delivery_incentive_exp,
    data = anon_choices %>% filter(phase == "phase_2") %>% filter(pair_includes_trans==1),
    fixef = c("stratum_id", "video_type", "delivery_incentive_exp", "phase"),
    cluster = "group_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = c("discussion_full", "discussion_pair_speaker", "discussion_pair_listener", "public"),
      interact = NULL,
      group_control = FALSE
    ),
    stratum = "stratum_id",
    coef_omit = "stratum_id|Intercept|video"
  )
)

anon_choices %>% filter(discuss_type %in% c("control", "discussion_full")) %>%
  mutate(pair_includes_trans = factor(pair_includes_trans)) %>%
  bar_chart(x = discussion_full, y = anon_choose_comparator, fill = pair_includes_trans, n_label = TRUE)
anon_choices %>% filter(treat_type_r2 %in% c("control", "discussion_full", "discussion_listener")) %>%
  mutate(pair_includes_trans = factor(pair_includes_trans)) %>%
  bar_chart(x = treat_type_r2, y = anon_choose_comparator, fill = pair_includes_trans, n_label = TRUE)

tex_export(
  anon_models,
  file = "outputs/tables/anon_choices.tex",
  coef_rename = coef_label,
  coef_reorder = c("pair_includes_trans", "pair_includes_trans:discussion_full", "discussion_full",
                   "pair_includes_trans:discussion_pair_speaker", "discussion_pair_speaker",
                   "pair_includes_trans:discussion_pair_listener", "discussion_pair_listener",
                   "pair_includes_trans:public", "public"),
  gof_map = fe_label_no_fe,
  coef_omit = vars_to_regex("stratum_id|Intercept|video_type|delivery|b11|bachelors|pair_includes_trans_alt$|dq12", control_vars),
  dep_means = list(
    "Mean: No discussion (private) + worker is non-trans" = "discuss_type == 'control' & pair_includes_trans == FALSE",
    "Mean: No discussion (private) + worker is trans" = "discuss_type == 'control' & pair_includes_trans == TRUE"
  ),
  add_rows = list_to_add_rows(control_rows)
)

(anon_models[[1]]$coefficients["pair_includes_trans"] * -100) %>%
  write_stat("outputs/stats/anon_trans_penalty.tex", digits = 1)

(anon_models[[1]]$coefficients["pair_includes_trans:discussion_full"] * 100) %>%
  write_stat("outputs/stats/anon_effect.tex", digits = 1)

anon_models[[1]] %>% get_p_val("pair_includes_trans:discussion_full") %>%
  write_stat("outputs/stats/anon_effect_p.tex", digits = 3, p_value = TRUE)

anon_models[[2]] %>% get_coeff("pair_includes_trans_alt:discussion_pair_listener") %>%
  times_100 %>%
  write_stat("outputs/stats/anon_effect_listener.tex", digits = 0)

anon_models[[2]] %>% get_p_val("pair_includes_trans_alt:discussion_pair_listener") %>%
  write_p_val("outputs/stats/anon_effect_listener_p.tex", digits = 2)

anon_choices %>%
  filter(pair_includes_trans == 1) %>%
  filter(phase == "phase_2") %>%
  bar_chart(x = treat_type_r2_label, y = anon_choose_comparator, flip = TRUE)

r1_choices %>%
  filter(pair_includes_trans == 1) %>%
  group_by(discuss_type) %>%
  count_prop(r1_choose_trans, return_count = TRUE)

tibble(
  x = rnorm(1000),
  y = rnorm(1000)
) %>%
  mutate(sum = x + y,
         diff = x - y) %>%
  lm()

# ........ ---------------------------------------------------------------

# Mediation analysis ---------------------------------------------------------------

r2_with_group_predic <- r2_choices_num %>%
  left_join(
    group_predic %>% group_by(ind_id) %>% summarise(group_predic_choose_trans = mean_na(group_predic_choose_trans)),
    by = "ind_id"
  )

r2_with_att <- r2_choices_num %>%
  left_join(
    atts_long %>% group_by(ind_id) %>% summarise(attitude_val = mean_na(attitude_val)),
    by = "ind_id"
  )

r2_with_reliability <- r2_choices_num %>%
  left_join(
    likelihood %>% filter(trans_photo_yn==1) %>% select(ind_id, likely),
    by = "ind_id"
  )

# First create the combined dataset
r2_choices_combined <- r2_choices_num %>%
  # Join with sn for n3
  left_join(sn %>% select(ind_id, n3), by = "ind_id") %>%
  count_prop(n3) %>%
  # Join for group predictions (using the same logic as r2_with_group_predic)
  left_join(
    group_predic %>%
      group_by(ind_id) %>%
      summarise(group_predic_choose_trans = mean_na(group_predic_choose_trans)),
    by = "ind_id"
  ) %>%
  # Join for attitudes (using same logic as r2_with_att)
  left_join(
    atts_long %>%
      group_by(ind_id) %>%
      summarise(attitude_val = mean_na(attitude_val)),
    by = "ind_id"
  ) %>%
  # Join for reliability (using same logic as r2_with_reliability)
  left_join(
    likelihood %>%
      filter(trans_photo_yn==1) %>%
      select(ind_id, likely),
    by = "ind_id"
  ) %>%
  # Join for laws
  left_join(laws_atts, by = "ind_id", suffix = c("", "_laws")) %>%
  mutate(k4_illegal = as.numeric(k4_illegal))


mediation <- list(
  feols_custom(
    r2_choose_trans ~  video_type_placebo + video_type_treatment + n3+discussion_full + factor(stratum_id) + delivery_incentive_exp + item_diff + r2_reliability_diff * r2_reliability_shown ,
    data = r2_choices_num %>%
      left_join(sn %>% select(ind_id, n3), by = "ind_id") %>%
      count_prop(n3) %>%
      filter(discuss_type %in% c("control", "discussion_full")),
    fixef = c("stratum_id", "video_type", "delivery_incentive_exp", "comparator_order_in_pair", "phase"),
    cluster = "group_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = "discussion_full",
      interact = "pair_includes_trans",
      group_control = TRUE
    )
  ),
  feols_custom(
    r2_choose_trans ~  video_type_placebo + video_type_treatment + group_predic_choose_trans  + discussion_full + factor(stratum_id) + delivery_incentive_exp + item_diff + r2_reliability_diff * r2_reliability_shown ,
    data = r2_with_group_predic %>%
      filter(discuss_type %in% c("control", "discussion_full")),
    fixef = c("stratum_id", "video_type", "delivery_incentive_exp", "comparator_order_in_pair", "phase"),
    cluster = "group_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = "discussion_full",
      interact = "pair_includes_trans",
      group_control = TRUE
    )
  ),
  feols_custom(
    r2_choose_trans ~  video_type_placebo + video_type_treatment + attitude_val  + discussion_full + factor(stratum_id) + delivery_incentive_exp + item_diff + r2_reliability_diff * r2_reliability_shown ,
    data = r2_with_att %>%
      filter(discuss_type %in% c("control", "discussion_full")),
    fixef = c("stratum_id", "video_type", "delivery_incentive_exp", "comparator_order_in_pair", "phase"),
    cluster = "group_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = "discussion_full",
      interact = "pair_includes_trans",
      group_control = TRUE
    )
  ),
  feols_custom(
    r2_choose_trans ~  video_type_placebo + video_type_treatment + likely  + discussion_full + factor(stratum_id) + delivery_incentive_exp + item_diff + r2_reliability_diff * r2_reliability_shown ,
    data = r2_with_reliability %>%
      filter(discuss_type %in% c("control", "discussion_full")),
    fixef = c("stratum_id", "video_type", "delivery_incentive_exp", "comparator_order_in_pair", "phase"),
    cluster = "group_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = "discussion_full",
      interact = "pair_includes_trans",
      group_control = TRUE
    )
  ),
  feols_custom(
    r2_choose_trans ~  video_type_placebo + video_type_treatment + discussion_full +
      n3 + group_predic_choose_trans + attitude_val + likely +
      factor(stratum_id) + delivery_incentive_exp + item_diff +
      r2_reliability_diff * r2_reliability_shown,
    data = r2_choices_combined %>% filter(discuss_type %in% c("control", "discussion_full")),
    fixef = c("stratum_id", "delivery_incentive_exp", "comparator_order_in_pair", "phase", "video_type"),
    cluster = "group_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = "discussion_full",
      interact = "pair_includes_trans",
      group_control = TRUE
    )
  )
) %>%
  set_names(rep("Dep var: Chose trans in private outcome round", 5))

tex_export(
  mediation,
  file = "outputs/tables/mediation.tex",
  coef_rename = coef_label,
  gof_map = fe_label_no_fe,
  coef_reorder = c("discussion_full"),
  coef_omit = vars_to_regex(c("stratum_id", "Intercept", "video", "group_control", "pair_includes_trans_alt", control_vars, "r2_reliability", "item_diff", "delivery")),
  dep_means = list(
    "Mean: No discussion (private)" = "discuss_type == 'control'",
    "Mean: No discussion (private)" = "discuss_type == 'control'"
  ),
  stat_vec = "long"
)

# Base model is the model with no additional controls
base_model <-   feols_custom(
    r2_choose_trans ~  video_type_placebo + video_type_treatment + discussion_full + factor(stratum_id) + delivery_incentive_exp + item_diff + r2_reliability_diff * r2_reliability_shown ,
    data = r2_with_att %>%
      filter(discuss_type %in% c("control", "discussion_full")),
    fixef = c("stratum_id", "video_type", "delivery_incentive_exp", "comparator_order_in_pair", "phase"),
    cluster = "group_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = "discussion_full",
      interact = "pair_includes_trans",
      group_control = TRUE
    )
  )

mediation[[3]]

# Run the Wald test to compare the models
wald_result <- lmtest::waldtest(base_model, mediation[[3]], test = "F")

# Print the results
print(wald_result)

# Extract coefficients and standard errors
coef_base <- coef(base_model)["discussion_full"]  # Replace "X" with your variable
coef_mediation <- coef(mediation[[3]])["discussion_full"]

# Use vcov() to get the variance-covariance matrices
se_base <- sqrt(vcov(base_model)["discussion_full", "discussion_full"])
se_mediation <- sqrt(vcov(mediation[[3]])["discussion_full", "discussion_full"])

# Calculate difference and its standard error
diff <- coef_base - coef_mediation
se_diff <- sqrt(se_base^2 + se_mediation^2)  # Simple approach

# Calculate t-statistic and p-value
t_stat <- diff / se_diff
df2 <- min(df.residual(base_model), df.residual(mediation[[3]]))
p_value <- 2 * pt(-abs(t_stat), df = df2)

# Print results
cat("Coefficient difference:", diff, "\n")
cat("t-statistic:", t_stat, "\n")
cat("p-value:", p_value, "\n")

# Export p-value
write_p_val(p_value, "outputs/stats/mediation_attitude_p.tex")

# Mediation - videos ----------------------------------------------------------------------------------------------------------------

mediation_videos <- list(
  feols_custom(
    r2_choose_trans ~  video_type_placebo + video_type_treatment + factor(stratum_id) + delivery_incentive_exp + item_diff + r2_reliability_diff * r2_reliability_shown ,
    data = r2_choices_num,
    fixef = c("stratum_id", "delivery_incentive_exp", "comparator_order_in_pair", "phase", "treat_type_r2_label"),
    cluster = "group_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = "discussion_full",
      interact = "pair_includes_trans",
      group_control = TRUE
    )
  ),
  feols_custom(
    r2_choose_trans ~  video_type_placebo + video_type_treatment + n3 + factor(stratum_id) + delivery_incentive_exp + item_diff + r2_reliability_diff * r2_reliability_shown ,
    data = r2_choices_num %>%
      left_join(sn %>% select(ind_id, n3), by = "ind_id") %>%
      count_prop(n3),
    fixef = c("stratum_id", "delivery_incentive_exp", "comparator_order_in_pair", "phase", "treat_type_r2_label"),
    cluster = "group_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = "discussion_full",
      interact = "pair_includes_trans",
      group_control = TRUE
    )
  ),
  feols_custom(
    r2_choose_trans ~  video_type_placebo + video_type_treatment + group_predic_choose_trans  + factor(stratum_id) + delivery_incentive_exp + item_diff + r2_reliability_diff * r2_reliability_shown ,
    data = r2_with_group_predic,
    fixef = c("stratum_id", "delivery_incentive_exp", "comparator_order_in_pair", "phase", "treat_type_r2_label"),
    cluster = "group_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = "discussion_full",
      interact = "pair_includes_trans",
      group_control = TRUE
    )
  ),
  feols_custom(
    r2_choose_trans ~  video_type_placebo + video_type_treatment + attitude_val  + factor(stratum_id) + delivery_incentive_exp + item_diff + r2_reliability_diff * r2_reliability_shown ,
    data = r2_with_att,
    fixef = c("stratum_id", "delivery_incentive_exp", "comparator_order_in_pair", "phase", "treat_type_r2_label"),
    cluster = "group_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = "discussion_full",
      interact = "pair_includes_trans",
      group_control = TRUE
    )
  ),
  feols_custom(
    r2_choose_trans ~  video_type_placebo + video_type_treatment + likely  + factor(stratum_id) + delivery_incentive_exp + item_diff + r2_reliability_diff * r2_reliability_shown ,
    data = r2_with_reliability,
    fixef = c("stratum_id", "delivery_incentive_exp", "comparator_order_in_pair", "phase", "treat_type_r2_label"),
    cluster = "group_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = "discussion_full",
      interact = "pair_includes_trans",
      group_control = TRUE
    )
  ),
  feols_custom(
    r2_choose_trans ~  video_type_placebo + video_type_treatment + k4_illegal  + factor(stratum_id) + delivery_incentive_exp + item_diff + r2_reliability_diff * r2_reliability_shown ,
    data = r2_choices_combined,
    fixef = c("stratum_id", "delivery_incentive_exp", "comparator_order_in_pair", "phase", "treat_type_r2_label"),
    cluster = "group_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = "discussion_full",
      interact = "pair_includes_trans",
      group_control = TRUE
    )
  ),
  feols_custom(
    r2_choose_trans ~  video_type_placebo + video_type_treatment +
      n3 + group_predic_choose_trans + attitude_val + likely + k4_illegal +
      factor(stratum_id) + delivery_incentive_exp + item_diff +
      r2_reliability_diff * r2_reliability_shown,
    data = r2_choices_combined,
    fixef = c("stratum_id", "delivery_incentive_exp", "comparator_order_in_pair", "phase", "treat_type_r2_label"),
    cluster = "group_id",
    lasso = TRUE,
    lasso_options = list(
      potential_controls = control_vars,
      t = "discussion_full",
      interact = "pair_includes_trans",
      group_control = TRUE
    )
  )
)

mediation_videos %>%
  set_names(rep("Dep var: Chose trans in private outcome round (=1)", length(mediation_videos))) %>%
  tex_export(
    file = "outputs/tables/mediation_videos.tex",
    coef_rename = coef_label,
    gof_map = fe_label_no_fe,
    coef_omit = vars_to_regex(c("stratum_id", "Intercept", "group_control", "pair_includes_trans_alt", control_vars, "r2_reliability", "item_diff", "delivery")),
    dep_means = list(
      "Mean: No discussion (private)" = "discuss_type == 'control'",
      "Mean: No discussion (private)" = "discuss_type == 'control'"
    ),
    stat_vec = "long"
  )

# Combined mechanisms (treatment round and outcome round) ----------------------------------------------------------------------------------------------------------------

# Combined group prediction figure
group_predic_long %>%
  # Filter for only the panels we want
  filter(
    (discuss_type %in% c("control", "discussion_full")) | # From first figure
    (discuss_type == "choosing_only" & phase == "phase_2") # From second figure (public only)
  ) %>%
  filter(group_predic_includes_trans) %>%
  
  # Create treatment labels
  mutate(
    treatment_label = case_when(
      discuss_type == "control" ~ "No discussion (private)",
      discuss_type == "choosing_only" ~ "No discussion (public)",
      discuss_type == "discussion_full" ~ "3-person discussion"
    ) %>%
      factor() %>%
      fct_relevel("No discussion (private)", "No discussion (public)", "3-person discussion")
  ) %>%
  
  # Generate bar chart data
  bar_chart(x = treatment_label, y = value, fill = name, return_data = TRUE) %>%
  
  # Add misperception statistics
  left_join(
    bind_rows(
      model_misper %>% select(discuss_type, label),
      p_vals_group_predic_non_trans %>% 
        filter(discuss_type == "choosing_only") %>% 
        select(discuss_type, label)
    ) %>%
      mutate(
        treatment_label = case_when(
          discuss_type == "control" ~ "No discussion (private)",
          discuss_type == "choosing_only" ~ "No discussion (public)", 
          discuss_type == "discussion_full" ~ "3-person discussion"
        )
      )
  ) %>%
  
  # Create plot
  ggplot(aes(x = name, y = y, fill = name)) +
  geom_col(aes(colour = name), position = position_dodge(1), width = 0.8, show.legend = FALSE) +
  geom_errorbar(aes(ymin = ymin, ymax = ymax), colour = "#333333", alpha = 0.9, width = 0.2, position = position_dodge(1)) +
  facet_wrap(~treatment_label, nrow = 1) +
  scale_color_discrete(type = c("#956cae", "#956cae")) +
  scale_fill_discrete(type = c("#956cae", "white")) +
  coord_cartesian(ylim = c(0, 0.7)) +
  scale_y_continuous(labels = scales::percent, expand = c(0, 0)) +
  labs(x = element_blank(), y = "% chose transgender", fill = element_blank()) +
  theme_minimal() +
  theme(
    panel.grid.major.y = element_blank(),
    panel.grid.minor.y = element_blank(),
    panel.grid.minor.x = element_blank(),
    panel.grid.major = element_blank(),
    axis.line = element_line(),
    axis.ticks = element_line(),
    panel.spacing = unit(0.7, "cm")
  ) +
  geom_text(
    aes(x = name, y = 0.03,
        label = str_glue("{format(round(as.numeric(y) * 100, 1), nsmall = 1)}%"))
  )

ggsave("outputs/figs/group_predic_combined.pdf", width = 6.75, height = 4, scale = 1)
